datadir="/Volumes/la420/home/CUTnTAG/antibodies/bowtie2_summary/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
## collect the alignment results from the bowtie2 alignment summary files
alignResult = c()
for(sample in sampleList){
alignRes = read.table(paste0(datadir, sample, "_trimmed_bowtie2.txt"), header = FALSE, fill = TRUE)
alignRate = substr(alignRes$V1[6], 1, nchar(as.character(alignRes$V1[6]))-1)
alignResult = data.frame(Antibody = sample, SequencingDepth = alignRes$V1[1] %>% as.character %>% as.numeric,
MappedFragNum_hg19 = alignRes$V1[4] %>% as.character %>% as.numeric + alignRes$V1[5] %>% as.character %>% as.numeric,
AlignmentRate_hg19 = alignRate %>% as.numeric) %>% rbind(alignResult, .)
}
alignResult %>% mutate(AlignmentRate_hg19 = paste0(AlignmentRate_hg19, "%"))## summarize the duplication information from the picard summary outputs
picarddir = "/Volumes/la420/home/CUTnTAG/antibodies/picard_summary/"
dupResult = c()
for(sample in sampleList){
dupRes = read.table(paste0(picarddir, sample, "_picard.rmDup.txt"), header = TRUE, fill = TRUE)
dupResult = data.frame(Antibody = sample, MappedFragNum_hg19 = dupRes$READ_PAIRS_EXAMINED[1] %>% as.character %>% as.numeric, DuplicationRate = dupRes$PERCENT_DUPLICATION[1] %>% as.character %>% as.numeric * 100, EstimatedLibrarySize = dupRes$ESTIMATED_LIBRARY_SIZE[1] %>% as.character %>% as.numeric) %>% mutate(UniqueFragNum = MappedFragNum_hg19 * (1-DuplicationRate/100)) %>% rbind(dupResult, .)
}
alignDupSummary = left_join(alignResult, dupResult, by = c("Antibody", "MappedFragNum_hg19")) %>% mutate(DuplicationRate = paste0(DuplicationRate, "%"))
alignDupSummary## collect the fragment size information
fragdir = "/Volumes/la420/home/CUTnTAG/antibodies/fragmentLen/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
fragLen = c()
for(sample in sampleList){
fragLen = read.table(paste0(fragdir, sample, "_rmDup_fragmentLen.txt"), header = FALSE) %>% mutate(fragLen = V1 %>% as.numeric, fragCount = V2 %>% as.numeric, Weight = as.numeric(V2)/sum(as.numeric(V2)), Antibody = sample) %>% rbind(fragLen, .)
}
fig5A = fragLen %>% ggplot(aes(x = Antibody, y = fragLen, weight = Weight, fill = Antibody)) +
geom_violin(bw = 5) +
scale_y_continuous(breaks = seq(0, 800, 50)) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "magma", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
theme_bw(base_size = 20) +
ggpubr::rotate_x_text(angle = 20) +
ylab("Fragment Length") +
xlab("")
fig5B = fragLen %>% ggplot(aes(x = fragLen, y = fragCount, color = Antibody, group = Antibody)) +
geom_line(size = 1) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "magma") +
theme_bw(base_size = 20) +
xlab("Fragment Length") +
ylab("Count") +
coord_cartesian(xlim = c(0, 500))
ggarrange(fig5A, fig5B, ncol = 2)## collect the fragment size information
fragdir = "/Volumes/la420/home/CUTnTAG/antibodies_withDup/fragmentLen/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
fragLen = c()
for(sample in sampleList){
fragLen = read.table(paste0(fragdir, sample, "_withDup_fragmentLen.txt"), header = FALSE) %>% mutate(fragLen = V1 %>% as.numeric, fragCount = V2 %>% as.numeric, Weight = as.numeric(V2)/sum(as.numeric(V2)), Antibody = sample) %>% rbind(fragLen, .)
}
fig5A = fragLen %>% ggplot(aes(x = Antibody, y = fragLen, weight = Weight, fill = Antibody)) +
geom_violin(bw = 5) +
scale_y_continuous(breaks = seq(0, 800, 50)) +
scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "viridis", alpha = 0.8) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
theme_bw(base_size = 20) +
ggpubr::rotate_x_text(angle = 20) +
ylab("Fragment Length") +
xlab("")
fig5B = fragLen %>% ggplot(aes(x = fragLen, y = fragCount, color = Antibody, group = Antibody)) +
geom_line(size = 1) +
scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "viridis") +
theme_bw(base_size = 20) +
xlab("Fragment Length") +
ylab("Count") +
coord_cartesian(xlim = c(0, 500))
ggarrange(fig5A, fig5B, ncol = 2)peakdir_S = "/Volumes/la420/home/CUTnTAG/antibodies/SEACR"
Diagenode_50x_path = file.path(peakdir_S, "Diagenode_50x_seacr_top0.01.peaks.stringent.bed")
Diagenode_50x = ChIPseeker::readPeakFile(Diagenode_50x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Abcam_ab4729_path = file.path(peakdir_S, "Abcam-ab4729_seacr_top0.01.peaks.stringent.bed")
Abcam_ab4729 = ChIPseeker::readPeakFile(Abcam_ab4729_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Diagenode_100x_path = file.path(peakdir_S, "Diagenode_100x_seacr_top0.01.peaks.stringent.bed")
Diagenode_100x = ChIPseeker::readPeakFile(Diagenode_100x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
ActiveMotif_path = file.path(peakdir_S, "ActiveMotif_seacr_top0.01.peaks.stringent.bed")
ActiveMotif = ChIPseeker::readPeakFile(ActiveMotif_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Abcam_ab177178_path = file.path(peakdir_S, "Abcam-ab177178_seacr_top0.01.peaks.stringent.bed")
Abcam_ab177178 = ChIPseeker::readPeakFile(Abcam_ab177178_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)peakdir_S = "/Volumes/la420/home/CUTnTAG/antibodies/SEACR/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
Total_CT_peaks = c()
for(sample in sampleList){
peakInfo_S = read.table(paste0(peakdir_S, sample, "_seacr_top0.01.peaks.stringent.bed"), header = FALSE, fill = TRUE) %>% mutate(width = abs(V3-V2))
Total_CT_peaks = data.frame(Total_CT_peaks = nrow(peakInfo_S), Antibody = sample) %>% rbind(Total_CT_peaks, .)
}
Total_CT_peaks %>% dplyr::select(Antibody, Total_CT_peaks)PeaksAlignmentSummary = left_join(Total_CT_peaks, alignDupSummary, by = "Antibody")
PeaksAlignmentSummary = PeaksAlignmentSummary[c("Antibody", "SequencingDepth", "MappedFragNum_hg19", "UniqueFragNum", "DuplicationRate", "Total_CT_peaks")]
PeaksAlignmentSummaryggplot(PeaksAlignmentSummary, aes(x = SequencingDepth, y = Total_CT_peaks)) + geom_point(aes(size = DuplicationRate)) + geom_text(aes(label = Antibody), nudge_y = 500, hjust = "inward")samples = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
for(sample in samples){
print(GenomicRanges::width(sample) %>% summary())
}## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 74 703 917 1048 1188 24950
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 43 1046 1339 1532 1758 25072
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 106 1720 2256 2696 3136 65394
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 177 1106 1393 1653 1832 23074
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 195 2075 2736 3231 3740 66803
datadir="/Volumes/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
inPeakData = c()
for(sample in sampleList){
peakRes = read.table(paste0(datadir, "/SEACR/", sample, "_seacr_top0.01.peaks.stringent.bed"), header = FALSE, fill = TRUE)
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$V1, IRanges(start = peakRes$V2, end = peakRes$V3), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
inPeakN = counts(fragment_counts)[,1] %>% sum
inPeakData = rbind(inPeakData, data.frame(inPeakN = inPeakN, Antibody = sample))
}
frip = left_join(alignDupSummary, inPeakData, by = "Antibody") %>% mutate(frip = inPeakN/UniqueFragNum * 100)
frip %>% dplyr::select(Antibody, SequencingDepth, MappedFragNum_hg19, AlignmentRate_hg19, UniqueFragNum, FragInPeakNum = inPeakN, FRiPs = frip)Remove largest peaks (in all cases, under 30 peaks per sample):
ActiveMotif_adj = data.frame(ActiveMotif)
ActiveMotif_adj = ActiveMotif_adj[ActiveMotif_adj$V4 < 50000, ] %>% GenomicRanges::GRanges()
Diagenode_100x_adj = data.frame(Diagenode_100x)
Diagenode_100x_adj = Diagenode_100x_adj[Diagenode_100x_adj$V4 < 100000, ] %>% GenomicRanges::GRanges()
Diagenode_50x_adj = data.frame(Diagenode_50x)
Diagenode_50x_adj = Diagenode_50x_adj[Diagenode_50x_adj$V4 < 400000, ] %>% GenomicRanges::GRanges()
Abcam_ab4729_adj = data.frame(Abcam_ab4729)
Abcam_ab4729_adj = Abcam_ab4729_adj[Abcam_ab4729_adj$V4 < 400000, ] %>% GenomicRanges::GRanges()
Abcam_ab177178_adj = data.frame(Abcam_ab177178)
Abcam_ab177178_adj = Abcam_ab177178_adj[Abcam_ab177178_adj$V4 < 100000, ] %>% GenomicRanges::GRanges()ChIPseeker::covplot(peak = ActiveMotif_adj,
weightCol = "V4",
title = "CUT&TAG ActiveMotif (1:100) H3K27ac peaks (SEACR)")ChIPseeker::covplot(peak = Diagenode_100x_adj,
weightCol = "V4",
title = "CUT&TAG Diagenode (1:100) H3K27ac peaks (SEACR)")ChIPseeker::covplot(peak = Diagenode_50x_adj,
weightCol = "V4",
title = "CUT&TAG Diagenode (1:50) H3K27ac peaks (SEACR)")Using ENCODE narrowPeak file with replicated H3K27ac peaks in K562 (ENCFF044JNJ)
extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")
ENCODE_narrowPeaks = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)Discard values of 0 (corresponding to peak ranges with 0 ENCODE overlaps) and count how many ranges (peaks) remain:
peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
total_overlapping_peaks_list = c()
for(i in 1:5){
overlaps = GenomicRanges::countOverlaps(query = peak_list[[i]], subject = ENCODE_narrowPeaks)
CT_peaks_in_ENCODE = overlaps[overlaps != 0] %>% length()
total_overlapping_peaks_list = c(total_overlapping_peaks_list, CT_peaks_in_ENCODE)
}
Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, Total_CT_peaks_falling_into_ENCODE = total_overlapping_peaks_list)
Total_CT_peaks_falling_into_ENCODEDiscard values of 0 (corresponding to peak ranges with 0 C&T overlaps) and count how many ranges (peaks) remain:
peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
captured_ENCODE_list = c()
for(i in 1:5){
overlaps = GenomicRanges::countOverlaps(query = ENCODE_narrowPeaks, subject = peak_list[[i]])
ENCODE_captured_peaks = overlaps[overlaps != 0] %>% length()
captured_ENCODE_list = c(captured_ENCODE_list, ENCODE_captured_peaks)
}
Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, Total_captured_ENCODE_peaks = captured_ENCODE_list)
Total_captured_ENCODE_peaksENCODE_overlaps = Total_captured_ENCODE_peaks %>% left_join(Total_CT_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")
ENCODE_overlaps$Percentage_of_CT_peaks_falling_into_ENCODE = ENCODE_overlaps$Total_CT_peaks_falling_into_ENCODE / ENCODE_overlaps$Total_CT_peaks * 100
ENCODE_overlaps$Percentage_of_total_ENCODE = ENCODE_overlaps$Total_captured_ENCODE_peaks / length(ENCODE_narrowPeaks) * 100
ENCODE_overlaps = ENCODE_overlaps[c("Antibody", "Total_CT_peaks", "Total_CT_peaks_falling_into_ENCODE", "Percentage_of_CT_peaks_falling_into_ENCODE", "Total_captured_ENCODE_peaks", "Percentage_of_total_ENCODE")]
ENCODE_overlapsENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])
#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")
ENCODE_overlap_plot = ggplot(ENCODE_overlaps) +
geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_total_ENCODE, fill = Antibody), show.legend = FALSE) +
ylim(0, 100) +
ylab("% of ENCODE peaks captured") +
xlab("Antibody") +
#scale_x_discrete(labels = x_labels) +
ggtitle("ENCODE peaks captured by C&T") +
theme_bw()
ENCODE_overlap_plotAbcam_ab4729_df = data.frame(Abcam_ab4729)
Abcam_ab4729_IRanges = IRanges(start = Abcam_ab4729_df$start, Abcam_ab4729_df$end)
ENCODE_narrowPeaks_df = data.frame(ENCODE_narrowPeaks)
ENCODE_narrowPeaks_IRanges = IRanges(start = ENCODE_narrowPeaks_df$start, ENCODE_narrowPeaks_df$end)
GenometriCorr::VisualiseTwoIRanges(Abcam_ab4729_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Abcam_ab4729", nameB = "ENCODE_narrowPeaks")ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])
#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")
Peaks_in_ENCODE_plot = ggplot(ENCODE_overlaps) +
geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_CT_peaks_falling_into_ENCODE, fill = Antibody), show.legend = FALSE) +
ylim(0, 100) +
ylab("% of sample peaks falling into ENCODE") +
xlab("Antibody") +
#scale_x_discrete(labels = x_labels) +
ggtitle("Proportion of C&T peaks falling into ENCODE") +
theme_bw()
Peaks_in_ENCODE_plotpromoters = ChIPseeker::getPromoters(TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, upstream = 3000, downstream = 3000)
peak_files = list("Active Motif (1:100)" = ActiveMotif_path,
"Diagenode (1:100)" = Diagenode_100x_path,
"Diagenode (1:50)" = Diagenode_50x_path,
"Abcam 177178 (1:100)" = Abcam_ab177178_path,
"Abcam 4729 (1:100)" = Abcam_ab4729_path,
"ENCODE narrow peaks" = ENCODE_narrowPeaks)
tagMatrixList = lapply(peak_files, getTagMatrix, windows = promoters)
ChIPseeker::plotAvgProf(tagMatrixList, xlim = c(-3000, 3000), conf = 0.95, resample = 500, facet = "row")## >> Running bootstrapping for tag matrix... 2021-04-25 19:23:33
## >> Running bootstrapping for tag matrix... 2021-04-25 19:24:23
## >> Running bootstrapping for tag matrix... 2021-04-25 19:25:37
## >> Running bootstrapping for tag matrix... 2021-04-25 19:26:30
## >> Running bootstrapping for tag matrix... 2021-04-25 19:28:04
## >> Running bootstrapping for tag matrix... 2021-04-25 19:30:10
peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
names(genes) = sub("_", "\n", names(genes))
compKEGG = clusterProfiler::compareCluster(geneCluster = genes,
fun = "enrichKEGG",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
clusterProfiler::dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")Without Active Motif
peaks = list("Diagenode (1:100)" = Diagenode_100x,
"Diagenode (1:50)" = Diagenode_50x,
"Abcam 177178 (1:100)" = Abcam_ab177178,
"Abcam 4729 (1:100)" = Abcam_ab4729)
peakAnnoList = lapply(peaks, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
vennplot(genes)Without Active Motif
peaks = list("Diagenode (1:100)" = GenomicRanges::GRanges(Diagenode_100x),
"Diagenode (1:50)" = GenomicRanges::GRanges(Diagenode_50x),
"Abcam 177178 (1:100)" = GenomicRanges::GRanges(Abcam_ab177178),
"Abcam 4729 (1:100)" = GenomicRanges::GRanges(Abcam_ab4729))
vennplot(peaks)## Motif.Name
## 1 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer
## 2 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer
## 3 CEBP:CEBP(bZIP)/MEF-Chop-ChIP-Seq(GSE35681)/Homer
## 4 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer
## 5 GATA:SCL(Zf,bHLH)/Ter119-SCL-ChIP-Seq(GSE18720)/Homer
## 6 Oct4:Sox17(POU,Homeobox,HMG)/F9-Sox17-ChIP-Seq(GSE44553)/Homer
## Consensus P.value Log.P.value q.value..Benjamini.
## 1 HTGCTGAGTCAT 1e-18 -43.11 0
## 2 GATGACTCAGCA 1e-16 -37.77 0
## 3 NTNATGCAAYMNNHTGMAAY 1e-16 -37.66 0
## 4 AWWNTGCTGAGTCAT 1e-12 -28.15 0
## 5 CRGCTGBNGNSNNSAGATAA 1e-12 -28.02 0
## 6 CCATTGTATGCAAAT 1e-10 -24.87 0
## X..of.Target.Sequences.with.Motif.of.3211. X..of.Target.Sequences.with.Motif
## 1 114 3.55%
## 2 121 3.77%
## 3 272 8.47%
## 4 102 3.18%
## 5 208 6.48%
## 6 193 6.01%
## X..of.Background.Sequences.with.Motif.of.45156.
## 1 609.6
## 2 719.1
## 3 2238.8
## 4 648.9
## 5 1729.6
## 6 1628.2
## X..of.Background.Sequences.with.Motif
## 1 1.35%
## 2 1.60%
## 3 4.97%
## 4 1.44%
## 5 3.84%
## 6 3.61%
## Motif.Name Consensus P.value
## 1 Elk4(ETS)/Hela-Elk4-ChIP-Seq(GSE31477)/Homer NRYTTCCGGY 1e-74
## 2 Elk1(ETS)/Hela-Elk1-ChIP-Seq(GSE31477)/Homer HACTTCCGGY 1e-72
## 3 YY1(Zf)/Promoter/Homer CAAGATGGCGGC 1e-54
## 4 ETS(ETS)/Promoter/Homer AACCGGAAGT 1e-52
## 5 ELF1(ETS)/Jurkat-ELF1-ChIP-Seq(SRA014231)/Homer AVCCGGAAGT 1e-52
## 6 GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC 1e-51
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.8040.
## 1 -170.8 0 3970
## 2 -166.2 0 3967
## 3 -125.2 0 851
## 4 -121.9 0 2446
## 5 -120.7 0 3625
## 6 -119.7 0 618
## X..of.Target.Sequences.with.Motif
## 1 49.38%
## 2 49.34%
## 3 10.58%
## 4 30.42%
## 5 45.09%
## 6 7.69%
## X..of.Background.Sequences.with.Motif.of.39503.
## 1 15528.8
## 2 15568.3
## 3 2382.5
## 4 9071.4
## 5 14517.7
## 6 1564.5
## X..of.Background.Sequences.with.Motif
## 1 39.31%
## 2 39.41%
## 3 6.03%
## 4 22.96%
## 5 36.75%
## 6 3.96%
## Motif.Name Consensus P.value
## 1 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-88
## 2 ETS(ETS)/Promoter/Homer AACCGGAAGT 1e-86
## 3 GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC 1e-83
## 4 YY1(Zf)/Promoter/Homer CAAGATGGCGGC 1e-83
## 5 Elk1(ETS)/Hela-Elk1-ChIP-Seq(GSE31477)/Homer HACTTCCGGY 1e-77
## 6 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-74
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.11995.
## 1 -202.9 0 890
## 2 -198.3 0 4787
## 3 -193.2 0 1105
## 4 -191.9 0 1537
## 5 -178.7 0 7099
## 6 -170.9 0 936
## X..of.Target.Sequences.with.Motif
## 1 7.42%
## 2 39.91%
## 3 9.21%
## 4 12.82%
## 5 59.19%
## 6 7.80%
## X..of.Background.Sequences.with.Motif.of.35235.
## 1 1257.9
## 2 11059.8
## 3 1736.7
## 4 2708.3
## 5 17861.8
## 6 1444.0
## X..of.Background.Sequences.with.Motif
## 1 3.57%
## 2 31.38%
## 3 4.93%
## 4 7.68%
## 5 50.68%
## 6 4.10%
## Motif.Name Consensus P.value
## 1 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-111
## 2 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer HTGCTGAGTCAT 1e-96
## 3 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-94
## 4 Jun-AP1(bZIP)/K562-cJun-ChIP-Seq(GSE31477)/Homer GATGASTCATCN 1e-91
## 5 Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer SAGATAAGRV 1e-91
## 6 Bach2(bZIP)/OCILy7-Bach2-ChIP-Seq(GSE44420)/Homer TGCTGAGTCA 1e-88
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.16900.
## 1 -256.0 0 1085
## 2 -222.8 0 912
## 3 -218.4 0 889
## 4 -210.9 0 3258
## 5 -210.5 0 5697
## 6 -203.5 0 2837
## X..of.Target.Sequences.with.Motif
## 1 6.42%
## 2 5.39%
## 3 5.26%
## 4 19.27%
## 5 33.69%
## 6 16.78%
## X..of.Background.Sequences.with.Motif.of.33092.
## 1 1002.7
## 2 828.0
## 3 805.7
## 4 4509.5
## 5 8809.8
## 6 3832.2
## X..of.Background.Sequences.with.Motif
## 1 3.03%
## 2 2.50%
## 3 2.43%
## 4 13.62%
## 5 26.61%
## 6 11.58%
## Motif.Name Consensus P.value
## 1 GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC 1e-157
## 2 YY1(Zf)/Promoter/Homer CAAGATGGCGGC 1e-99
## 3 ETS(ETS)/Promoter/Homer AACCGGAAGT 1e-80
## 4 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-79
## 5 Ronin(THAP)/ES-Thap11-ChIP-Seq(GSE51522)/Homer RACTACAACTCCCAGVAKGC 1e-72
## 6 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-68
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.13519.
## 1 -362.3 0 1556
## 2 -228.0 0 1959
## 3 -185.8 0 5838
## 4 -182.5 0 1097
## 5 -165.9 0 848
## 6 -158.2 0 1193
## X..of.Target.Sequences.with.Motif
## 1 11.51%
## 2 14.49%
## 3 43.17%
## 4 8.11%
## 5 6.27%
## 6 8.82%
## X..of.Background.Sequences.with.Motif.of.33620.
## 1 1859.2
## 2 2987.6
## 3 11843.8
## 4 1480.4
## 5 1075.9
## 6 1734.5
## X..of.Background.Sequences.with.Motif
## 1 5.53%
## 2 8.88%
## 3 35.22%
## 4 4.40%
## 5 3.20%
## 6 5.16%
peakdir_M = "/Volumes/la420/home/CUTnTAG/antibodies/MACS2"
Diagenode_50x_path = file.path(peakdir_M, "Diagenode_50x_q0.05_peaks.narrowPeak")
Diagenode_50x = ChIPseeker::readPeakFile(Diagenode_50x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Abcam_ab4729_path = file.path(peakdir_M, "Abcam-ab4729_q0.05_peaks.narrowPeak")
Abcam_ab4729 = ChIPseeker::readPeakFile(Abcam_ab4729_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Diagenode_100x_path = file.path(peakdir_M, "Diagenode_100x_q0.05_peaks.narrowPeak")
Diagenode_100x = ChIPseeker::readPeakFile(Diagenode_100x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
ActiveMotif_path = file.path(peakdir_M, "ActiveMotif_q0.05_peaks.narrowPeak")
ActiveMotif = ChIPseeker::readPeakFile(ActiveMotif_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
Abcam_ab177178_path = file.path(peakdir_M, "Abcam-ab177178_q0.05_peaks.narrowPeak")
Abcam_ab177178 = ChIPseeker::readPeakFile(Abcam_ab177178_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)peakdir_M = "/Volumes/la420/home/CUTnTAG/antibodies/MACS2/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
Total_CT_peaks = c()
for(sample in sampleList){
peakInfo_M = read.table(paste0(peakdir_M, sample, "_q0.05_peaks.narrowPeak"), header = FALSE, fill = TRUE) %>% mutate(width = abs(V3-V2))
Total_CT_peaks = data.frame(Total_CT_peaks = nrow(peakInfo_M), Antibody = sample) %>% rbind(Total_CT_peaks, .)
}
Total_CT_peaks %>% dplyr::select(Antibody, Total_CT_peaks)PeaksAlignmentSummary = left_join(Total_CT_peaks, alignDupSummary, by = "Antibody")
PeaksAlignmentSummary = PeaksAlignmentSummary[c("Antibody", "SequencingDepth", "MappedFragNum_hg19", "UniqueFragNum", "DuplicationRate", "Total_CT_peaks")]
PeaksAlignmentSummaryggplot(PeaksAlignmentSummary, aes(x = SequencingDepth, y = Total_CT_peaks)) + geom_point(aes(size = DuplicationRate)) + geom_text(aes(label = Antibody), nudge_y = 500, hjust = "inward")samples = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
for(sample in samples){
print(GenomicRanges::width(sample) %>% summary())
}## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 178.0 207.0 257.0 321.1 360.0 3558.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 183.0 233.0 315.0 368.4 442.0 2198.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 206.0 279.0 402.0 503.2 617.0 4230.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 193 224 277 328 380 1770
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 223.0 333.0 491.0 610.4 773.0 3967.0
datadir="/Volumes/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")
inPeakData = c()
for(sample in sampleList){
peakRes = read.table(paste0(datadir, "/MACS2/", sample, "_q0.05_peaks.narrowPeak"), header = FALSE, fill = TRUE)
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$V1, IRanges(start = peakRes$V2, end = peakRes$V3), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
inPeakN = counts(fragment_counts)[,1] %>% sum
inPeakData = rbind(inPeakData, data.frame(inPeakN = inPeakN, Antibody = sample))
}
frip = left_join(alignDupSummary, inPeakData, by = "Antibody") %>% mutate(frip = inPeakN/UniqueFragNum * 100)
frip %>% dplyr::select(Antibody, SequencingDepth, MappedFragNum_hg19, AlignmentRate_hg19, UniqueFragNum, FragInPeakNum = inPeakN, FRiPs = frip)Remove largest peaks (in all cases, under 50 peaks per sample):
ActiveMotif_adj = data.frame(ActiveMotif)
ActiveMotif_adj = ActiveMotif_adj[ActiveMotif_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()
Diagenode_100x_adj = data.frame(Diagenode_100x)
Diagenode_100x_adj = Diagenode_100x_adj[Diagenode_100x_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()
Diagenode_50x_adj = data.frame(Diagenode_50x)
Diagenode_50x_adj = Diagenode_50x_adj[Diagenode_50x_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()
Abcam_ab4729_adj = data.frame(Abcam_ab4729)
Abcam_ab4729_adj = Abcam_ab4729_adj[Abcam_ab4729_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()
Abcam_ab177178_adj = data.frame(Abcam_ab177178)
Abcam_ab177178_adj = Abcam_ab177178_adj[Abcam_ab177178_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()ChIPseeker::covplot(peak = ActiveMotif_adj,
weightCol = "V5",
title = "CUT&TAG ActiveMotif (1:100) H3K27ac peaks (MACS2)")ChIPseeker::covplot(peak = Diagenode_100x_adj,
weightCol = "V5",
title = "CUT&TAG Diagenode (1:100) H3K27ac peaks (MACS2)")ChIPseeker::covplot(peak = Diagenode_50x_adj,
weightCol = "V5",
title = "CUT&TAG Diagenode (1:50) H3K27ac peaks (MACS2)")Discard values of 0 (corresponding to peak ranges with 0 ENCODE overlaps) and count how many ranges (peaks) remain:
peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
total_overlapping_peaks_list = c()
for(i in 1:5){
overlaps = GenomicRanges::countOverlaps(query = peak_list[[i]], subject = ENCODE_narrowPeaks)
CT_peaks_in_ENCODE = overlaps[overlaps != 0] %>% length()
total_overlapping_peaks_list = c(total_overlapping_peaks_list, CT_peaks_in_ENCODE)
}
Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, Total_CT_peaks_falling_into_ENCODE = total_overlapping_peaks_list)
Total_CT_peaks_falling_into_ENCODEDiscard values of 0 (corresponding to peak ranges with 0 C&T overlaps) and count how many ranges (peaks) remain:
peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
captured_ENCODE_list = c()
for(i in 1:5){
overlaps = GenomicRanges::countOverlaps(query = ENCODE_narrowPeaks, subject = peak_list[[i]])
ENCODE_captured_peaks = overlaps[overlaps != 0] %>% length()
captured_ENCODE_list = c(captured_ENCODE_list, ENCODE_captured_peaks)
}
Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, Total_captured_ENCODE_peaks = captured_ENCODE_list)
Total_captured_ENCODE_peaksENCODE_overlaps = Total_captured_ENCODE_peaks %>% left_join(Total_CT_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")
ENCODE_overlaps$Percentage_of_CT_peaks_falling_into_ENCODE = ENCODE_overlaps$Total_CT_peaks_falling_into_ENCODE / ENCODE_overlaps$Total_CT_peaks * 100
ENCODE_overlaps$Percentage_of_total_ENCODE = ENCODE_overlaps$Total_captured_ENCODE_peaks / length(ENCODE_narrowPeaks) * 100
ENCODE_overlaps = ENCODE_overlaps[c("Antibody", "Total_CT_peaks", "Total_CT_peaks_falling_into_ENCODE", "Percentage_of_CT_peaks_falling_into_ENCODE", "Total_captured_ENCODE_peaks", "Percentage_of_total_ENCODE")]
ENCODE_overlapsENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])
#x_labels = c("Active Motif (1:100)", "Abcam 177178 (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 4729 (1:100)")
ENCODE_overlap_plot = ggplot(ENCODE_overlaps) +
geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_total_ENCODE, fill = Antibody), show.legend = FALSE) +
ylim(0, 100) +
ylab("% of ENCODE peaks captured") +
xlab("Antibody") +
#scale_x_discrete(labels = x_labels) +
ggtitle("ENCODE peaks captured by C&T") +
theme_bw()
ENCODE_overlap_plotAbcam_ab4729_df = data.frame(Abcam_ab4729)
Abcam_ab4729_IRanges = IRanges(start = Abcam_ab4729_df$start, Abcam_ab4729_df$end)
ENCODE_narrowPeaks_IRanges = IRanges(start = ENCODE_narrowPeaks_df$start, ENCODE_narrowPeaks_df$end)
GenometriCorr::VisualiseTwoIRanges(Abcam_ab4729_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Abcam_ab4729", nameB = "ENCODE_narrowPeaks")ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])
#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")
Peaks_in_ENCODE_plot = ggplot(ENCODE_overlaps) +
geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_CT_peaks_falling_into_ENCODE, fill = Antibody), show.legend = FALSE) +
ylim(0, 100) +
ylab("% of sample peaks falling into ENCODE") +
xlab("Antibody") +
#scale_x_discrete(labels = x_labels) +
ggtitle("Proportion of C&T peaks falling into ENCODE") +
theme_bw()
Peaks_in_ENCODE_plotpromoters = ChIPseeker::getPromoters(TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, upstream = 3000, downstream = 3000)
peak_files = list("Active Motif (1:100)" = ActiveMotif_path,
"Diagenode (1:100)" = Diagenode_100x_path,
"Diagenode (1:50)" = Diagenode_50x_path,
"Abcam 177178 (1:100)" = Abcam_ab177178_path,
"Abcam 4729 (1:100)" = Abcam_ab4729_path,
"ENCODE narrow peaks" = ENCODE_narrowPeaks)
tagMatrixList = lapply(peak_files, getTagMatrix, windows = promoters)
ChIPseeker::plotAvgProf(tagMatrixList, xlim = c(-3000, 3000), conf = 0.95, resample = 500, facet = "row")## >> Running bootstrapping for tag matrix... 2021-04-25 20:10:56
## >> Running bootstrapping for tag matrix... 2021-04-25 20:11:37
## >> Running bootstrapping for tag matrix... 2021-04-25 20:13:18
## >> Running bootstrapping for tag matrix... 2021-04-25 20:13:43
## >> Running bootstrapping for tag matrix... 2021-04-25 20:16:17
## >> Running bootstrapping for tag matrix... 2021-04-25 20:19:45
peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList)genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
names(genes) = sub("_", "\n", names(genes))
compKEGG = clusterProfiler::compareCluster(geneCluster = genes,
fun = "enrichKEGG",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
clusterProfiler::dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")Without Active Motif
peak_files = list("Diagenode (1:100)" = Diagenode_100x,
"Diagenode (1:50)" = Diagenode_50x,
"Abcam 177178 (1:100)" = Abcam_ab177178,
"Abcam 4729 (1:100)" = Abcam_ab4729)
peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
ChIPseeker::vennplot(genes)Without Active Motif
peaks = list("Diagenode (1:100)" = GenomicRanges::GRanges(Diagenode_100x),
"Diagenode (1:50)" = GenomicRanges::GRanges(Diagenode_50x),
"Abcam 177178 (1:100)" = GenomicRanges::GRanges(Abcam_ab177178),
"Abcam 4729 (1:100)" = GenomicRanges::GRanges(Abcam_ab4729))
vennplot(peaks)## Motif.Name Consensus
## 1 ZNF652/HepG2-ZNF652.Flag-ChIP-Seq(Encode)/Homer TTAACCCTTTVNKKN
## 2 Phox2b(Homeobox)/CLBGA-PHOX2B-ChIP-Seq(GSE90683)/Homer TTAATTNAATTA
## 3 Ronin(THAP)/ES-Thap11-ChIP-Seq(GSE51522)/Homer RACTACAACTCCCAGVAKGC
## 4 Sp1(Zf)/Promoter/Homer GGCCCCGCCCCC
## 5 GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC
## 6 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer HTGCTGAGTCAT
## P.value Log.P.value q.value..Benjamini.
## 1 1e-10 -25.29 0.0000
## 2 1e-09 -20.91 0.0000
## 3 1e-09 -20.87 0.0000
## 4 1e-07 -17.17 0.0000
## 5 1e-06 -15.74 0.0000
## 6 1e-04 -11.00 0.0012
## X..of.Target.Sequences.with.Motif.of.2618. X..of.Target.Sequences.with.Motif
## 1 133 5.08%
## 2 110 4.20%
## 3 42 1.60%
## 4 315 12.02%
## 5 50 1.91%
## 6 30 1.15%
## X..of.Background.Sequences.with.Motif.of.43372.
## 1 1166.7
## 2 969.0
## 3 230.4
## 4 3841.9
## 5 361.7
## 6 206.3
## X..of.Background.Sequences.with.Motif
## 1 2.69%
## 2 2.24%
## 3 0.53%
## 4 8.87%
## 5 0.84%
## 6 0.48%
## Motif.Name Consensus P.value
## 1 Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer SAGATAAGRV 1e-57
## 2 Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer BBCTTATCTS 1e-55
## 3 Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer YCTTATCTBN 1e-55
## 4 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer HTGCTGAGTCAT 1e-54
## 5 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-54
## 6 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-50
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.7953.
## 1 -131.6 0 804
## 2 -128.1 0 864
## 3 -127.8 0 1087
## 4 -125.9 0 153
## 5 -125.3 0 173
## 6 -116.9 0 151
## X..of.Target.Sequences.with.Motif
## 1 10.11%
## 2 10.86%
## 3 13.67%
## 4 1.92%
## 5 2.18%
## 6 1.90%
## X..of.Background.Sequences.with.Motif.of.41178.
## 1 2285.4
## 2 2541.7
## 3 3442.5
## 4 161.9
## 5 206.5
## 6 169.2
## X..of.Background.Sequences.with.Motif
## 1 5.55%
## 2 6.18%
## 3 8.37%
## 4 0.39%
## 5 0.50%
## 6 0.41%
## Motif.Name Consensus P.value
## 1 p53(p53)/mES-cMyc-ChIP-Seq(GSE11431)/Homer ACATGCCCGGGCAT 1e-192
## 2 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-169
## 3 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-163
## 4 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer HTGCTGAGTCAT 1e-153
## 5 NFE2L2(bZIP)/HepG2-NFE2L2-ChIP-Seq(Encode)/Homer AWWWTGCTGAGTCAT 1e-151
## 6 Bach2(bZIP)/OCILy7-Bach2-ChIP-Seq(GSE44420)/Homer TGCTGAGTCA 1e-147
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.24167.
## 1 -442.2 0 580
## 2 -391.3 0 680
## 3 -376.2 0 628
## 4 -354.3 0 594
## 5 -348.6 0 538
## 6 -340.2 0 1545
## X..of.Target.Sequences.with.Motif
## 1 2.40%
## 2 2.81%
## 3 2.60%
## 4 2.46%
## 5 2.23%
## 6 6.39%
## X..of.Background.Sequences.with.Motif.of.25423.
## 1 132.2
## 2 198.9
## 3 177.7
## 4 168.2
## 5 142.6
## 6 789.9
## X..of.Background.Sequences.with.Motif
## 1 0.52%
## 2 0.78%
## 3 0.70%
## 4 0.66%
## 5 0.56%
## 6 3.10%
## Motif.Name Consensus P.value
## 1 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT 1e-79
## 2 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer GATGACTCAGCA 1e-77
## 3 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer HTGCTGAGTCAT 1e-73
## 4 Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer YCTTATCTBN 1e-66
## 5 Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer SAGATAAGRV 1e-65
## 6 Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer BBCTTATCTS 1e-63
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.7348.
## 1 -183.3 0 194
## 2 -177.8 0 231
## 3 -169.7 0 196
## 4 -153.8 0 1014
## 5 -152.0 0 779
## 6 -145.9 0 826
## X..of.Target.Sequences.with.Motif
## 1 2.64%
## 2 3.15%
## 3 2.67%
## 4 13.81%
## 5 10.61%
## 6 11.25%
## X..of.Background.Sequences.with.Motif.of.42500.
## 1 195.5
## 2 288.6
## 3 217.2
## 4 3309.5
## 5 2313.8
## 6 2547.7
## X..of.Background.Sequences.with.Motif
## 1 0.46%
## 2 0.68%
## 3 0.51%
## 4 7.84%
## 5 5.48%
## 6 6.04%
## Motif.Name Consensus P.value
## 1 p53(p53)/mES-cMyc-ChIP-Seq(GSE11431)/Homer ACATGCCCGGGCAT 0e+00
## 2 Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer BBCTTATCTS 0e+00
## 3 Gata4(Zf)/Heart-Gata4-ChIP-Seq(GSE35151)/Homer NBWGATAAGR 0e+00
## 4 FOXP1(Forkhead)/H9-FOXP1-ChIP-Seq(GSE31006)/Homer NYYTGTTTACHN 1e-302
## 5 Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer YCTTATCTBN 1e-286
## 6 Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer SAGATAAGRV 1e-270
## Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.30028.
## 1 -884.3 0 1063
## 2 -774.2 0 5936
## 3 -754.8 0 8176
## 4 -697.7 0 4284
## 5 -659.2 0 7279
## 6 -622.7 0 5194
## X..of.Target.Sequences.with.Motif
## 1 3.54%
## 2 19.75%
## 3 27.20%
## 4 14.25%
## 5 24.22%
## 6 17.28%
## X..of.Background.Sequences.with.Motif.of.29743.
## 1 208.2
## 2 3512.9
## 3 5373.7
## 4 2335.7
## 5 4774.7
## 6 3129.1
## X..of.Background.Sequences.with.Motif
## 1 0.70%
## 2 11.84%
## 3 18.12%
## 4 7.87%
## 5 16.10%
## 6 10.55%